library(AmesHousing)
ames = make_ames()
ames
head(ames)
# gives first six observations
# good for checking that all variables you want are in data set and for looking at type of variables
str(ames)
tibble [2,930 × 81] (S3: tbl_df/tbl/data.frame)
$ MS_SubClass : Factor w/ 16 levels "One_Story_1946_and_Newer_All_Styles",..: 1 1 1 1 6 6 12 12 12 6 ...
$ MS_Zoning : Factor w/ 7 levels "Floating_Village_Residential",..: 3 2 3 3 3 3 3 3 3 3 ...
$ Lot_Frontage : num [1:2930] 141 80 81 93 74 78 41 43 39 60 ...
$ Lot_Area : int [1:2930] 31770 11622 14267 11160 13830 9978 4920 5005 5389 7500 ...
$ Street : Factor w/ 2 levels "Grvl","Pave": 2 2 2 2 2 2 2 2 2 2 ...
$ Alley : Factor w/ 3 levels "Gravel","No_Alley_Access",..: 2 2 2 2 2 2 2 2 2 2 ...
$ Lot_Shape : Factor w/ 4 levels "Regular","Slightly_Irregular",..: 2 1 2 1 2 2 1 2 2 1 ...
$ Land_Contour : Factor w/ 4 levels "Bnk","HLS","Low",..: 4 4 4 4 4 4 4 2 4 4 ...
$ Utilities : Factor w/ 3 levels "AllPub","NoSeWa",..: 1 1 1 1 1 1 1 1 1 1 ...
$ Lot_Config : Factor w/ 5 levels "Corner","CulDSac",..: 1 5 1 1 5 5 5 5 5 5 ...
$ Land_Slope : Factor w/ 3 levels "Gtl","Mod","Sev": 1 1 1 1 1 1 1 1 1 1 ...
$ Neighborhood : Factor w/ 29 levels "North_Ames","College_Creek",..: 1 1 1 1 7 7 17 17 17 7 ...
$ Condition_1 : Factor w/ 9 levels "Artery","Feedr",..: 3 2 3 3 3 3 3 3 3 3 ...
$ Condition_2 : Factor w/ 8 levels "Artery","Feedr",..: 3 3 3 3 3 3 3 3 3 3 ...
$ Bldg_Type : Factor w/ 5 levels "OneFam","TwoFmCon",..: 1 1 1 1 1 1 5 5 5 1 ...
$ House_Style : Factor w/ 8 levels "One_and_Half_Fin",..: 3 3 3 3 8 8 3 3 3 8 ...
$ Overall_Qual : Factor w/ 10 levels "Very_Poor","Poor",..: 6 5 6 7 5 6 8 8 8 7 ...
$ Overall_Cond : Factor w/ 10 levels "Very_Poor","Poor",..: 5 6 6 5 5 6 5 5 5 5 ...
$ Year_Built : int [1:2930] 1960 1961 1958 1968 1997 1998 2001 1992 1995 1999 ...
$ Year_Remod_Add : int [1:2930] 1960 1961 1958 1968 1998 1998 2001 1992 1996 1999 ...
$ Roof_Style : Factor w/ 6 levels "Flat","Gable",..: 4 2 4 4 2 2 2 2 2 2 ...
$ Roof_Matl : Factor w/ 8 levels "ClyTile","CompShg",..: 2 2 2 2 2 2 2 2 2 2 ...
$ Exterior_1st : Factor w/ 16 levels "AsbShng","AsphShn",..: 4 14 15 4 14 14 6 7 6 14 ...
$ Exterior_2nd : Factor w/ 17 levels "AsbShng","AsphShn",..: 11 15 16 4 15 15 6 7 6 15 ...
$ Mas_Vnr_Type : Factor w/ 5 levels "BrkCmn","BrkFace",..: 5 4 2 4 4 2 4 4 4 4 ...
$ Mas_Vnr_Area : num [1:2930] 112 0 108 0 0 20 0 0 0 0 ...
$ Exter_Qual : Factor w/ 4 levels "Excellent","Fair",..: 4 4 4 3 4 4 3 3 3 4 ...
$ Exter_Cond : Factor w/ 5 levels "Excellent","Fair",..: 5 5 5 5 5 5 5 5 5 5 ...
$ Foundation : Factor w/ 6 levels "BrkTil","CBlock",..: 2 2 2 2 3 3 3 3 3 3 ...
$ Bsmt_Qual : Factor w/ 6 levels "Excellent","Fair",..: 6 6 6 6 3 6 3 3 3 6 ...
$ Bsmt_Cond : Factor w/ 6 levels "Excellent","Fair",..: 3 6 6 6 6 6 6 6 6 6 ...
$ Bsmt_Exposure : Factor w/ 5 levels "Av","Gd","Mn",..: 2 4 4 4 4 4 3 4 4 4 ...
$ BsmtFin_Type_1 : Factor w/ 7 levels "ALQ","BLQ","GLQ",..: 2 6 1 1 3 3 3 1 3 7 ...
$ BsmtFin_SF_1 : num [1:2930] 2 6 1 1 3 3 3 1 3 7 ...
$ BsmtFin_Type_2 : Factor w/ 7 levels "ALQ","BLQ","GLQ",..: 7 4 7 7 7 7 7 7 7 7 ...
$ BsmtFin_SF_2 : num [1:2930] 0 144 0 0 0 0 0 0 0 0 ...
$ Bsmt_Unf_SF : num [1:2930] 441 270 406 1045 137 ...
$ Total_Bsmt_SF : num [1:2930] 1080 882 1329 2110 928 ...
$ Heating : Factor w/ 6 levels "Floor","GasA",..: 2 2 2 2 2 2 2 2 2 2 ...
$ Heating_QC : Factor w/ 5 levels "Excellent","Fair",..: 2 5 5 1 3 1 1 1 1 3 ...
$ Central_Air : Factor w/ 2 levels "N","Y": 2 2 2 2 2 2 2 2 2 2 ...
$ Electrical : Factor w/ 6 levels "FuseA","FuseF",..: 5 5 5 5 5 5 5 5 5 5 ...
$ First_Flr_SF : int [1:2930] 1656 896 1329 2110 928 926 1338 1280 1616 1028 ...
$ Second_Flr_SF : int [1:2930] 0 0 0 0 701 678 0 0 0 776 ...
$ Low_Qual_Fin_SF : int [1:2930] 0 0 0 0 0 0 0 0 0 0 ...
$ Gr_Liv_Area : int [1:2930] 1656 896 1329 2110 1629 1604 1338 1280 1616 1804 ...
$ Bsmt_Full_Bath : num [1:2930] 1 0 0 1 0 0 1 0 1 0 ...
$ Bsmt_Half_Bath : num [1:2930] 0 0 0 0 0 0 0 0 0 0 ...
$ Full_Bath : int [1:2930] 1 1 1 2 2 2 2 2 2 2 ...
$ Half_Bath : int [1:2930] 0 0 1 1 1 1 0 0 0 1 ...
$ Bedroom_AbvGr : int [1:2930] 3 2 3 3 3 3 2 2 2 3 ...
$ Kitchen_AbvGr : int [1:2930] 1 1 1 1 1 1 1 1 1 1 ...
$ Kitchen_Qual : Factor w/ 5 levels "Excellent","Fair",..: 5 5 3 1 5 3 3 3 3 3 ...
$ TotRms_AbvGrd : int [1:2930] 7 5 6 8 6 7 6 5 5 7 ...
$ Functional : Factor w/ 8 levels "Maj1","Maj2",..: 8 8 8 8 8 8 8 8 8 8 ...
$ Fireplaces : int [1:2930] 2 0 0 2 1 1 0 0 1 1 ...
$ Fireplace_Qu : Factor w/ 6 levels "Excellent","Fair",..: 3 4 4 6 6 3 4 4 6 6 ...
$ Garage_Type : Factor w/ 7 levels "Attchd","Basment",..: 1 1 1 1 1 1 1 1 1 1 ...
$ Garage_Finish : Factor w/ 4 levels "Fin","No_Garage",..: 1 4 4 1 1 1 1 3 3 1 ...
$ Garage_Cars : num [1:2930] 2 1 1 2 2 2 2 2 2 2 ...
$ Garage_Area : num [1:2930] 528 730 312 522 482 470 582 506 608 442 ...
$ Garage_Qual : Factor w/ 6 levels "Excellent","Fair",..: 6 6 6 6 6 6 6 6 6 6 ...
$ Garage_Cond : Factor w/ 6 levels "Excellent","Fair",..: 6 6 6 6 6 6 6 6 6 6 ...
$ Paved_Drive : Factor w/ 3 levels "Dirt_Gravel",..: 2 3 3 3 3 3 3 3 3 3 ...
$ Wood_Deck_SF : int [1:2930] 210 140 393 0 212 360 0 0 237 140 ...
$ Open_Porch_SF : int [1:2930] 62 0 36 0 34 36 0 82 152 60 ...
$ Enclosed_Porch : int [1:2930] 0 0 0 0 0 0 170 0 0 0 ...
$ Three_season_porch: int [1:2930] 0 0 0 0 0 0 0 0 0 0 ...
$ Screen_Porch : int [1:2930] 0 120 0 0 0 0 0 144 0 0 ...
$ Pool_Area : int [1:2930] 0 0 0 0 0 0 0 0 0 0 ...
$ Pool_QC : Factor w/ 5 levels "Excellent","Fair",..: 4 4 4 4 4 4 4 4 4 4 ...
$ Fence : Factor w/ 5 levels "Good_Privacy",..: 5 3 5 5 3 5 5 5 5 5 ...
$ Misc_Feature : Factor w/ 6 levels "Elev","Gar2",..: 3 3 2 3 3 3 3 3 3 3 ...
$ Misc_Val : int [1:2930] 0 0 12500 0 0 0 0 0 0 0 ...
$ Mo_Sold : int [1:2930] 5 6 6 4 3 6 4 1 3 6 ...
$ Year_Sold : int [1:2930] 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 ...
$ Sale_Type : Factor w/ 10 levels "COD","Con","ConLD",..: 10 10 10 10 10 10 10 10 10 10 ...
$ Sale_Condition : Factor w/ 6 levels "Abnorml","AdjLand",..: 5 5 5 5 5 5 5 5 5 5 ...
$ Sale_Price : int [1:2930] 215000 105000 172000 244000 189900 195500 213500 191500 236500 189000 ...
$ Longitude : num [1:2930] -93.6 -93.6 -93.6 -93.6 -93.6 ...
$ Latitude : num [1:2930] 42.1 42.1 42.1 42.1 42.1 ...
- attr(*, "spec")=List of 2
..$ cols :List of 82
.. ..$ Order : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ PID : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ MS SubClass : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ MS Zoning : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Lot Frontage : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Lot Area : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Street : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Alley : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Lot Shape : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Land Contour : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Utilities : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Lot Config : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Land Slope : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Neighborhood : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Condition 1 : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Condition 2 : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Bldg Type : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ House Style : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Overall Qual : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Overall Cond : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Year Built : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Year Remod/Add : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Roof Style : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Roof Matl : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Exterior 1st : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Exterior 2nd : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Mas Vnr Type : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Mas Vnr Area : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Exter Qual : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Exter Cond : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Foundation : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Bsmt Qual : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Bsmt Cond : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Bsmt Exposure : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ BsmtFin Type 1 : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ BsmtFin SF 1 : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ BsmtFin Type 2 : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ BsmtFin SF 2 : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Bsmt Unf SF : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Total Bsmt SF : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Heating : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Heating QC : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Central Air : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Electrical : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ 1st Flr SF : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ 2nd Flr SF : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Low Qual Fin SF: list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Gr Liv Area : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Bsmt Full Bath : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Bsmt Half Bath : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Full Bath : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Half Bath : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Bedroom AbvGr : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Kitchen AbvGr : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Kitchen Qual : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ TotRms AbvGrd : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Functional : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Fireplaces : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Fireplace Qu : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Garage Type : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Garage Yr Blt : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Garage Finish : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Garage Cars : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Garage Area : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Garage Qual : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Garage Cond : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Paved Drive : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Wood Deck SF : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Open Porch SF : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Enclosed Porch : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ 3Ssn Porch : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Screen Porch : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Pool Area : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Pool QC : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Fence : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Misc Feature : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Misc Val : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Mo Sold : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Yr Sold : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
.. ..$ Sale Type : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ Sale Condition : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ SalePrice : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
..$ default: list()
.. ..- attr(*, "class")= chr [1:2] "collector_guess" "collector"
..- attr(*, "class")= chr "col_spec"
# gives you the structure of the data
# Data types - how the data are stored in R
# Levels are categories
# use $ and variable name
min(ames$Lot_Frontage)
[1] 0
max(ames$Lot_Frontage)
[1] 313
ames
range(ames$Lot_Area)
[1] 1300 215245
mean(ames$Lot_Area)
[1] 10147.92
median(ames$Lot_Area)
[1] 9436.5
## can also use quantile function
quantile(ames$Lot_Area, 0.5)
50%
9436.5
quantile(ames$Lot_Area, 0.25)
25%
7440.25
# can do with any quartiles
# difference between first and third quartiles
IQR(ames$Lot_Area)
[1] 4115
sd(ames$Lot_Area)
[1] 7880.018
var(ames$Lot_Area)
[1] 62094680
## can also apply to multiple columns
# can also try : ames[,c(“lot_area”,”lot_frontage")
lapply(ames[, 3:4], sd)
$Lot_Frontage
[1] 33.49944
$Lot_Area
[1] 7880.018
# remember that lapply function applies a specific function to data --> i.e. (data you want, function you want)
summary(ames[, 3:4])
Lot_Frontage Lot_Area
Min. : 0.00 Min. : 1300
1st Qu.: 43.00 1st Qu.: 7440
Median : 63.00 Median : 9436
Mean : 57.65 Mean : 10148
3rd Qu.: 78.00 3rd Qu.: 11555
Max. :313.00 Max. :215245
Unfortunately, there is no function in R to find the mode of a variable (that I know of). Here’s one way I know of to do it.
table_ames <- table(ames$Lot_Area) # number of occurrences for each unique value
sort(table_ames, decreasing = TRUE) # sort highest to lowest
# although the printout is long, we can see that the mode for lot area is 9600
summary(ames$Bldg_Type)
OneFam TwoFmCon Duplex Twnhs TwnhsE
2425 62 109 101 233
# gives you the number of each factor in the data
# lets say we want to count the number of each "paved"
summary(ames$Paved_Drive)
Dirt_Gravel Partial_Pavement Paved
216 62 2652
# another way to count the number of paved
sum(ames$Paved_Drive == "Paved")
[1] 2652
# or condition
sum(ames$Paved_Drive == "Paved" | ames$Paved_Drive == "Dirt_Gravel")
[1] 2868
# and condition
sum(ames$Paved_Drive == "Paved" & ames$Paved_Drive == "Dirt_Gravel")
[1] 0
sum(ames$Paved_Drive == "Paved" & ames$Alley == "No_Alley_Access")
[1] 2518
# find the number of lots greater than 1000
sum(ames$Lot_Area > 1000, na.rm=TRUE)
[1] 2930
# na.rm = TRUE removes missing values for you
# find the number of lots between 1000 and 2000
sum(ames$Lot_Area > 1000 & ames$Lot_Area < 2000, na.rm=TRUE)
[1] 57
library(dplyr)
# ames2 = ames %>% mutate(filtered_area = ifelse(Lot_Area > 10000 & Lot_Area < 20000, 1, 0))%>%select(filtered_area, everything())
ames2 = ames %>% mutate(filtered_area = ifelse(Lot_Area > 10000 & Lot_Area < 20000, 1, 0))%>%select(filtered_area, where(filtered_area==1))
Error: object 'filtered_area' not found
Run `rlang::last_error()` to see where the error occurred.
library(dplyr)
ames2 = ames %>% mutate(filtered_area = ifelse(Lot_Area > 10000 & Lot_Area < 20000, 1, 0))%>%filter(filtered_area ==1)
ames2
# now we have a variable at the end of our data set with this condition present
barplot(table(ames$Lot_Shape)) # table() is mandatory
# really basic automatic barplot in R
# more fancy, customizable barplot in R
library(ggplot2)
ggplot(ames, aes(x = Lot_Shape, fill = Lot_Shape)) +
geom_bar() +
ggtitle("Lot Shape of Houses in Ames") +
xlab("Lot Shape") + ylab("Number of Houses")
NA
plot(ames$Lot_Area,
type = "l"
) # "l" for line
hist(ames$Lot_Frontage)
## can do same in ggplot
ggplot(ames) +
aes(x = Lot_Frontage) +
geom_histogram(bins = 15)
# can change number of bins
boxplot(ames$Lot_Frontage)
# side by side comparison of numerical to categorical
boxplot(ames$Lot_Frontage ~ ames$Alley)
ggplot(ames) +
aes(x = Alley, y = Lot_Frontage) +
geom_boxplot()
ames
plot(ames$Lot_Area, ames$Gr_Liv_Area)
ggplot(ames) +
aes(x = ames$Lot_Area, y = ames$Gr_Liv_Area) +
geom_point()
# add in a categorical factor for more info
ggplot(ames) +
aes(x = ames$Lot_Area, y = ames$Gr_Liv_Area, color = Alley) +
geom_point() +
scale_color_hue()
# Draw points on the qq-plot:
qqnorm(ames$Lot_Area)
# Draw the reference line:
qqline(ames$Lot_Area)
We can see that this deviates a lot from normality (the theoretical line of normal data), so this normality assumption would be broken.
plot(density(ames$Lot_Area))